## Warning: Missing column names filled in: 'X1' [1]
income_hiv %>%
filter(year != "2011" & age != "All") %>%
lm(hiv_diagnoses ~ borough + gender + age + mid_income, data = .) %>%
summary()
##
## Call:
## lm(formula = hiv_diagnoses ~ borough + gender + age + mid_income,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.106 -3.702 -1.040 2.239 50.426
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.835e-01 3.024e-01 3.252 0.00115 **
## boroughBrooklyn 2.975e-01 2.807e-01 1.060 0.28922
## boroughManhattan 3.091e+00 3.313e-01 9.332 < 2e-16 ***
## boroughQueens -1.245e+00 2.588e-01 -4.811 1.53e-06 ***
## boroughStaten Island -4.376e+00 3.972e-01 -11.016 < 2e-16 ***
## genderMale 6.083e+00 1.515e-01 40.138 < 2e-16 ***
## age20 - 29 9.600e+00 2.625e-01 36.576 < 2e-16 ***
## age30 - 39 6.870e+00 2.625e-01 26.175 < 2e-16 ***
## age40 - 49 4.627e+00 2.625e-01 17.627 < 2e-16 ***
## age50 - 59 2.355e+00 2.625e-01 8.972 < 2e-16 ***
## age60+ 4.267e-01 2.625e-01 1.626 0.10406
## mid_income -1.238e-04 6.938e-06 -17.851 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.682 on 7764 degrees of freedom
## Multiple R-squared: 0.3594, Adjusted R-squared: 0.3585
## F-statistic: 395.9 on 11 and 7764 DF, p-value: < 2.2e-16
income_hiv %>%
filter(year != "2011" & race != "All") %>%
lm(hiv_diagnoses ~ borough + gender + race + mid_income, data = .) %>%
summary()
##
## Call:
## lm(formula = hiv_diagnoses ~ borough + gender + race + mid_income,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.319 -5.652 -1.628 2.949 84.026
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.514e+00 5.142e-01 2.945 0.00324 **
## boroughBrooklyn 3.570e-01 4.929e-01 0.724 0.46898
## boroughManhattan 3.710e+00 5.818e-01 6.376 1.95e-10 ***
## boroughQueens -1.494e+00 4.545e-01 -3.287 0.00102 **
## boroughStaten Island -5.251e+00 6.976e-01 -7.527 5.90e-14 ***
## genderMale 7.299e+00 2.662e-01 27.425 < 2e-16 ***
## raceBlack 1.093e+01 4.208e-01 25.978 < 2e-16 ***
## raceLatino/Hispanic 9.027e+00 4.208e-01 21.451 < 2e-16 ***
## raceOther/Unknown -1.380e+00 4.208e-01 -3.278 0.00105 **
## raceWhite 3.628e+00 4.208e-01 8.621 < 2e-16 ***
## mid_income -1.486e-04 1.218e-05 -12.197 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.71 on 6469 degrees of freedom
## Multiple R-squared: 0.2699, Adjusted R-squared: 0.2687
## F-statistic: 239.1 on 10 and 6469 DF, p-value: < 2.2e-16
income_plot = income_hiv %>%
filter(year != "2011") %>%
group_by(uhf, year) %>%
summarise(sum_hiv = mean(hiv_diagnoses), mid_in = median(mid_income)) %>%
ggplot(aes(x = mid_in, y = sum_hiv, color = year)) +
geom_point() +
theme_bw() +
theme(legend.position = "None")
ggplotly(income_plot)
Income distribution in different neighborhood
income_dist = income_hiv %>%
ggplot(aes(y = mid_income, x = uhf)) +
geom_point(alpha = 0.1) +
coord_flip() +
theme_bw()
ggplotly(income_dist)